# =============================================================================
# This code is intended to show how the sample illustrating the Safegraph data was created.
#   A sufficient subset of code used to clean data is provided. Code was used to read and organize the full safegraph data that will not be provided. 
#   The raw data is proprietary and will not be provided. 
#   The output of this code is provided.
#   NOTE: THE PATTERN DATA IS A RANDOMIZED VERSION OF A SLICE. ANALYSIS WITH THIS SAMPLE IS NOT FEASIBLE AND RESULTS WILL NOT BE INTERPRETABLE.
# =============================================================================

#Import the main datasets and make copies
ct = pd.read_stata(r'D:\jodo_emga\Plasma\Data\Clarity\Clarity_Raw2Clean\Tradelines.dta')
ci = pd.read_stata(r'D:\jodo_emga\Plasma\Data\Clarity\Clarity_Raw2Clean\Inquiries.dta')
#Create a random sample, the illustration of data preparation will be provided in core1 and core2. 
ct_raw_sample = resample(df=ct,n=100000)
ci_raw_sample = resample(df=ci,n=100000)
ct_raw_sample.to_parquet(cdd['p_d_cl'] + r'\Sample\ct_raw_sample.parquet')
ci_raw_sample.to_parquet(cdd['p_d_cl'] + r'\Sample\ci_raw_sample.parquet')


